{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 312,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "import json\n",
    "import demjson\n",
    "from time import sleep\n",
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://api.bilibili.com/x/v3/fav/folder/created/list-all?up_mid={uid}\"\n",
    "header = {\"User-Agents\" : \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36 Edg/83.0.478.54\"}\n",
    "crawler = requests.Session(\n",
    "crawler.headrs = header\n",
    "uid = 94649037"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "json_data = crawler.get(url=url.format(uid = uid)).json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 327,
   "metadata": {},
   "outputs": [],
   "source": [
    "my_likes = [{data['title'] : [data['id'], data['media_count']]} for data in json_data['data']['list'] if data['mid'] == uid and '学习' not in data['title'] and '默认' not in data['title']]\n",
    "file = open('./data.txt', 'a', encoding = 'utf-8')\n",
    "media_url = 'https://api.bilibili.com/x/v3/fav/resource/list?media_id={media_id}&pn={page}&ps=20'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "**************************粪坑10**************************\n",
      "粪坑10 page: 1 done...\n",
      "粪坑10 page: 2 done...\n",
      "粪坑10 page: 3 done...\n",
      "粪坑10 page: 4 done...\n",
      "粪坑10 page: 5 done...\n",
      "粪坑10 page: 6 done...\n",
      "粪坑10 page: 7 done...\n",
      "粪坑10 page: 8 done...\n",
      "粪坑10 page: 9 done...\n",
      "粪坑10 page: 10 done...\n",
      "粪坑10 page: 11 done...\n",
      "粪坑10 page: 12 done...\n",
      "粪坑10 page: 13 done...\n",
      "粪坑10 page: 14 done...\n",
      "粪坑10 page: 15 done...\n",
      "粪坑10 page: 16 done...\n",
      "粪坑10 page: 17 done...\n",
      "粪坑10 page: 18 done...\n",
      "粪坑10 page: 19 done...\n",
      "粪坑10 page: 20 done...\n",
      "粪坑10 page: 21 done...\n",
      "粪坑10 page: 22 done...\n",
      "粪坑10 page: 23 done...\n",
      "粪坑10 page: 24 done...\n",
      "粪坑10 page: 25 done...\n",
      "粪坑10 page: 26 done...\n",
      "粪坑10 page: 27 done...\n",
      "粪坑10 page: 28 done...\n",
      "粪坑10 page: 29 done...\n",
      "粪坑10 page: 30 done...\n",
      "粪坑10 page: 31 done...\n",
      "粪坑10 page: 32 done...\n",
      "粪坑10 page: 33 done...\n",
      "**************************素材2**************************\n",
      "素材2 page: 1 done...\n",
      "素材2 page: 2 done...\n",
      "素材2 page: 3 done...\n",
      "素材2 page: 4 done...\n",
      "素材2 page: 5 done...\n",
      "素材2 page: 6 done...\n",
      "素材2 page: 7 done...\n",
      "素材2 page: 8 done...\n",
      "素材2 page: 9 done...\n",
      "素材2 page: 10 done...\n",
      "素材2 page: 11 done...\n",
      "素材2 page: 12 done...\n",
      "素材2 page: 13 done...\n",
      "素材2 page: 14 done...\n",
      "素材2 page: 15 done...\n",
      "素材2 page: 16 done...\n",
      "素材2 page: 17 done...\n",
      "素材2 page: 18 done...\n",
      "素材2 page: 19 done...\n",
      "素材2 page: 20 done...\n",
      "素材2 page: 21 done...\n",
      "素材2 page: 22 done...\n",
      "素材2 page: 23 done...\n",
      "素材2 page: 24 done...\n",
      "素材2 page: 25 done...\n",
      "素材2 page: 26 done...\n",
      "素材2 page: 27 done...\n",
      "**************************粪坑9**************************\n",
      "粪坑9 page: 1 done...\n",
      "粪坑9 page: 2 done...\n",
      "粪坑9 page: 3 done...\n",
      "粪坑9 page: 4 done...\n",
      "粪坑9 page: 5 done...\n",
      "粪坑9 page: 6 done...\n",
      "粪坑9 page: 7 done...\n",
      "粪坑9 page: 8 done...\n",
      "粪坑9 page: 9 done...\n",
      "粪坑9 page: 10 done...\n",
      "粪坑9 page: 11 done...\n",
      "粪坑9 page: 12 done...\n",
      "粪坑9 page: 13 done...\n",
      "粪坑9 page: 14 done...\n",
      "粪坑9 page: 15 done...\n",
      "粪坑9 page: 16 done...\n",
      "粪坑9 page: 17 done...\n",
      "粪坑9 page: 18 done...\n",
      "粪坑9 page: 19 done...\n",
      "粪坑9 page: 20 done...\n",
      "粪坑9 page: 21 done...\n",
      "粪坑9 page: 22 done...\n",
      "粪坑9 page: 23 done...\n",
      "粪坑9 page: 24 done...\n",
      "粪坑9 page: 25 done...\n",
      "粪坑9 page: 26 done...\n",
      "粪坑9 page: 27 done...\n",
      "粪坑9 page: 28 done...\n",
      "粪坑9 page: 29 done...\n",
      "粪坑9 page: 30 done...\n",
      "粪坑9 page: 31 done...\n",
      "粪坑9 page: 32 done...\n",
      "粪坑9 page: 33 done...\n",
      "粪坑9 page: 34 done...\n",
      "粪坑9 page: 35 done...\n",
      "粪坑9 page: 36 done...\n",
      "粪坑9 page: 37 done...\n",
      "粪坑9 page: 38 done...\n",
      "粪坑9 page: 39 done...\n",
      "粪坑9 page: 40 done...\n",
      "粪坑9 page: 41 done...\n",
      "粪坑9 page: 42 done...\n",
      "粪坑9 page: 43 done...\n",
      "粪坑9 page: 44 done...\n",
      "粪坑9 page: 45 done...\n",
      "粪坑9 page: 46 done...\n",
      "粪坑9 page: 47 done...\n",
      "粪坑9 page: 48 done...\n",
      "粪坑9 page: 49 done...\n",
      "粪坑9 page: 50 done...\n",
      "**************************粪坑8**************************\n",
      "粪坑8 page: 1 done...\n",
      "粪坑8 page: 2 done...\n",
      "粪坑8 page: 3 done...\n",
      "粪坑8 page: 4 done...\n",
      "粪坑8 page: 5 done...\n",
      "粪坑8 page: 6 done...\n",
      "粪坑8 page: 7 done...\n",
      "粪坑8 page: 8 done...\n",
      "粪坑8 page: 9 done...\n",
      "粪坑8 page: 10 done...\n",
      "粪坑8 page: 11 done...\n",
      "粪坑8 page: 12 done...\n",
      "粪坑8 page: 13 done...\n",
      "粪坑8 page: 14 done...\n",
      "粪坑8 page: 15 done...\n",
      "粪坑8 page: 16 done...\n",
      "粪坑8 page: 17 done...\n",
      "粪坑8 page: 18 done...\n",
      "粪坑8 page: 19 done...\n",
      "粪坑8 page: 20 done...\n",
      "粪坑8 page: 21 done...\n",
      "粪坑8 page: 22 done...\n",
      "粪坑8 page: 23 done...\n",
      "粪坑8 page: 24 done...\n",
      "粪坑8 page: 25 done...\n",
      "粪坑8 page: 26 done...\n",
      "粪坑8 page: 27 done...\n",
      "粪坑8 page: 28 done...\n",
      "粪坑8 page: 29 done...\n",
      "粪坑8 page: 30 done...\n",
      "粪坑8 page: 31 done...\n",
      "粪坑8 page: 32 done...\n",
      "粪坑8 page: 33 done...\n",
      "粪坑8 page: 34 done...\n",
      "粪坑8 page: 35 done...\n",
      "粪坑8 page: 36 done...\n",
      "粪坑8 page: 37 done...\n",
      "粪坑8 page: 38 done...\n",
      "粪坑8 page: 39 done...\n",
      "粪坑8 page: 40 done...\n",
      "粪坑8 page: 41 done...\n",
      "粪坑8 page: 42 done...\n",
      "粪坑8 page: 43 done...\n",
      "粪坑8 page: 44 done...\n",
      "粪坑8 page: 45 done...\n",
      "粪坑8 page: 46 done...\n",
      "粪坑8 page: 47 done...\n",
      "粪坑8 page: 48 done...\n",
      "粪坑8 page: 49 done...\n",
      "粪坑8 page: 50 done...\n",
      "**************************粪坑7**************************\n",
      "粪坑7 page: 1 done...\n",
      "粪坑7 page: 2 done...\n",
      "粪坑7 page: 3 done...\n",
      "粪坑7 page: 4 done...\n",
      "粪坑7 page: 5 done...\n",
      "粪坑7 page: 6 done...\n",
      "粪坑7 page: 7 done...\n",
      "粪坑7 page: 8 done...\n",
      "粪坑7 page: 9 done...\n",
      "粪坑7 page: 10 done...\n",
      "粪坑7 page: 11 done...\n",
      "粪坑7 page: 12 done...\n",
      "粪坑7 page: 13 done...\n",
      "粪坑7 page: 14 done...\n",
      "粪坑7 page: 15 done...\n",
      "粪坑7 page: 16 done...\n",
      "粪坑7 page: 17 done...\n",
      "粪坑7 page: 18 done...\n",
      "粪坑7 page: 19 done...\n",
      "粪坑7 page: 20 done...\n",
      "粪坑7 page: 21 done...\n",
      "粪坑7 page: 22 done...\n",
      "粪坑7 page: 23 done...\n",
      "粪坑7 page: 24 done...\n",
      "粪坑7 page: 25 done...\n",
      "粪坑7 page: 26 done...\n",
      "粪坑7 page: 27 done...\n",
      "粪坑7 page: 28 done...\n",
      "粪坑7 page: 29 done...\n",
      "粪坑7 page: 30 done...\n",
      "粪坑7 page: 31 done...\n",
      "粪坑7 page: 32 done...\n",
      "粪坑7 page: 33 done...\n",
      "粪坑7 page: 34 done...\n",
      "粪坑7 page: 35 done...\n",
      "粪坑7 page: 36 done...\n",
      "粪坑7 page: 37 done...\n",
      "粪坑7 page: 38 done...\n",
      "粪坑7 page: 39 done...\n",
      "粪坑7 page: 40 done...\n",
      "粪坑7 page: 41 done...\n",
      "粪坑7 page: 42 done...\n",
      "粪坑7 page: 43 done...\n",
      "粪坑7 page: 44 done...\n",
      "粪坑7 page: 45 done...\n",
      "粪坑7 page: 46 done...\n",
      "粪坑7 page: 47 done...\n",
      "粪坑7 page: 48 done...\n",
      "粪坑7 page: 49 done...\n",
      "粪坑7 page: 50 done...\n",
      "**************************粪坑6**************************\n",
      "粪坑6 page: 1 done...\n",
      "粪坑6 page: 2 done...\n",
      "粪坑6 page: 3 done...\n",
      "粪坑6 page: 4 done...\n",
      "粪坑6 page: 5 done...\n",
      "粪坑6 page: 6 done...\n",
      "粪坑6 page: 7 done...\n",
      "粪坑6 page: 8 done...\n",
      "粪坑6 page: 9 done...\n",
      "粪坑6 page: 10 done...\n",
      "粪坑6 page: 11 done...\n",
      "粪坑6 page: 12 done...\n",
      "粪坑6 page: 13 done...\n",
      "粪坑6 page: 14 done...\n",
      "粪坑6 page: 15 done...\n",
      "粪坑6 page: 16 done...\n",
      "粪坑6 page: 17 done...\n",
      "粪坑6 page: 18 done...\n",
      "粪坑6 page: 19 done...\n",
      "粪坑6 page: 20 done...\n",
      "粪坑6 page: 21 done...\n",
      "粪坑6 page: 22 done...\n",
      "粪坑6 page: 23 done...\n",
      "粪坑6 page: 24 done...\n",
      "粪坑6 page: 25 done...\n",
      "粪坑6 page: 26 done...\n",
      "粪坑6 page: 27 done...\n",
      "粪坑6 page: 28 done...\n",
      "粪坑6 page: 29 done...\n",
      "粪坑6 page: 30 done...\n",
      "粪坑6 page: 31 done...\n",
      "粪坑6 page: 32 done...\n",
      "粪坑6 page: 33 done...\n",
      "粪坑6 page: 34 done...\n",
      "粪坑6 page: 35 done...\n",
      "粪坑6 page: 36 done...\n",
      "粪坑6 page: 37 done...\n",
      "粪坑6 page: 38 done...\n",
      "粪坑6 page: 39 done...\n",
      "粪坑6 page: 40 done...\n",
      "粪坑6 page: 41 done...\n",
      "粪坑6 page: 42 done...\n",
      "粪坑6 page: 43 done...\n",
      "粪坑6 page: 44 done...\n",
      "粪坑6 page: 45 done...\n",
      "粪坑6 page: 46 done...\n",
      "粪坑6 page: 47 done...\n",
      "粪坑6 page: 48 done...\n",
      "粪坑6 page: 49 done...\n",
      "粪坑6 page: 50 done...\n",
      "**************************粪坑5**************************\n",
      "粪坑5 page: 1 done...\n",
      "粪坑5 page: 2 done...\n",
      "粪坑5 page: 3 done...\n",
      "粪坑5 page: 4 done...\n",
      "粪坑5 page: 5 done...\n",
      "粪坑5 page: 6 done...\n",
      "粪坑5 page: 7 done...\n",
      "粪坑5 page: 8 done...\n",
      "粪坑5 page: 9 done...\n",
      "粪坑5 page: 10 done...\n",
      "粪坑5 page: 11 done...\n",
      "粪坑5 page: 12 done...\n",
      "粪坑5 page: 13 done...\n",
      "粪坑5 page: 14 done...\n",
      "粪坑5 page: 15 done...\n",
      "粪坑5 page: 16 done...\n",
      "粪坑5 page: 17 done...\n",
      "粪坑5 page: 18 done...\n",
      "粪坑5 page: 19 done...\n",
      "粪坑5 page: 20 done...\n",
      "粪坑5 page: 21 done...\n",
      "粪坑5 page: 22 done...\n",
      "粪坑5 page: 23 done...\n",
      "粪坑5 page: 24 done...\n",
      "粪坑5 page: 25 done...\n",
      "粪坑5 page: 26 done...\n",
      "粪坑5 page: 27 done...\n",
      "粪坑5 page: 28 done...\n",
      "粪坑5 page: 29 done...\n",
      "粪坑5 page: 30 done...\n",
      "粪坑5 page: 31 done...\n",
      "粪坑5 page: 32 done...\n",
      "粪坑5 page: 33 done...\n",
      "粪坑5 page: 34 done...\n",
      "粪坑5 page: 35 done...\n",
      "粪坑5 page: 36 done...\n",
      "粪坑5 page: 37 done...\n",
      "粪坑5 page: 38 done...\n",
      "粪坑5 page: 39 done...\n",
      "粪坑5 page: 40 done...\n",
      "粪坑5 page: 41 done...\n",
      "粪坑5 page: 42 done...\n",
      "粪坑5 page: 43 done...\n",
      "粪坑5 page: 44 done...\n",
      "粪坑5 page: 45 done...\n",
      "粪坑5 page: 46 done...\n",
      "粪坑5 page: 47 done...\n",
      "粪坑5 page: 48 done...\n",
      "粪坑5 page: 49 done...\n",
      "粪坑5 page: 50 done...\n",
      "**************************粪坑4**************************\n",
      "粪坑4 page: 1 done...\n",
      "粪坑4 page: 2 done...\n",
      "粪坑4 page: 3 done...\n",
      "粪坑4 page: 4 done...\n",
      "粪坑4 page: 5 done...\n",
      "粪坑4 page: 6 done...\n",
      "粪坑4 page: 7 done...\n",
      "粪坑4 page: 8 done...\n",
      "粪坑4 page: 9 done...\n",
      "粪坑4 page: 10 done...\n",
      "粪坑4 page: 11 done...\n",
      "粪坑4 page: 12 done...\n",
      "粪坑4 page: 13 done...\n",
      "粪坑4 page: 14 done...\n",
      "粪坑4 page: 15 done...\n",
      "粪坑4 page: 16 done...\n",
      "粪坑4 page: 17 done...\n",
      "粪坑4 page: 18 done...\n",
      "粪坑4 page: 19 done...\n",
      "粪坑4 page: 20 done...\n",
      "粪坑4 page: 21 done...\n",
      "粪坑4 page: 22 done...\n",
      "粪坑4 page: 23 done...\n",
      "粪坑4 page: 24 done...\n",
      "粪坑4 page: 25 done...\n",
      "粪坑4 page: 26 done...\n",
      "粪坑4 page: 27 done...\n",
      "粪坑4 page: 28 done...\n",
      "粪坑4 page: 29 done...\n",
      "粪坑4 page: 30 done...\n",
      "粪坑4 page: 31 done...\n",
      "粪坑4 page: 32 done...\n",
      "粪坑4 page: 33 done...\n",
      "粪坑4 page: 34 done...\n",
      "粪坑4 page: 35 done...\n",
      "粪坑4 page: 36 done...\n",
      "粪坑4 page: 37 done...\n",
      "粪坑4 page: 38 done...\n",
      "粪坑4 page: 39 done...\n",
      "粪坑4 page: 40 done...\n",
      "粪坑4 page: 41 done...\n",
      "粪坑4 page: 42 done...\n",
      "粪坑4 page: 43 done...\n",
      "粪坑4 page: 44 done...\n",
      "粪坑4 page: 45 done...\n",
      "粪坑4 page: 46 done...\n",
      "粪坑4 page: 47 done...\n",
      "粪坑4 page: 48 done...\n",
      "粪坑4 page: 49 done...\n",
      "粪坑4 page: 50 done...\n",
      "**************************粪坑3**************************\n",
      "粪坑3 page: 1 done...\n",
      "粪坑3 page: 2 done...\n",
      "粪坑3 page: 3 done...\n",
      "粪坑3 page: 4 done...\n",
      "粪坑3 page: 5 done...\n",
      "粪坑3 page: 6 done...\n",
      "粪坑3 page: 7 done...\n",
      "粪坑3 page: 8 done...\n",
      "粪坑3 page: 9 done...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "粪坑3 page: 10 done...\n",
      "粪坑3 page: 11 done...\n",
      "粪坑3 page: 12 done...\n",
      "粪坑3 page: 13 done...\n",
      "粪坑3 page: 14 done...\n",
      "粪坑3 page: 15 done...\n",
      "粪坑3 page: 16 done...\n",
      "粪坑3 page: 17 done...\n",
      "粪坑3 page: 18 done...\n",
      "粪坑3 page: 19 done...\n",
      "粪坑3 page: 20 done...\n",
      "粪坑3 page: 21 done...\n",
      "粪坑3 page: 22 done...\n",
      "粪坑3 page: 23 done...\n",
      "粪坑3 page: 24 done...\n",
      "粪坑3 page: 25 done...\n",
      "粪坑3 page: 26 done...\n",
      "粪坑3 page: 27 done...\n",
      "粪坑3 page: 28 done...\n",
      "粪坑3 page: 29 done...\n",
      "粪坑3 page: 30 done...\n",
      "粪坑3 page: 31 done...\n",
      "粪坑3 page: 32 done...\n",
      "粪坑3 page: 33 done...\n",
      "粪坑3 page: 34 done...\n",
      "粪坑3 page: 35 done...\n",
      "粪坑3 page: 36 done...\n",
      "粪坑3 page: 37 done...\n",
      "粪坑3 page: 38 done...\n",
      "粪坑3 page: 39 done...\n",
      "粪坑3 page: 40 done...\n",
      "粪坑3 page: 41 done...\n",
      "粪坑3 page: 42 done...\n",
      "粪坑3 page: 43 done...\n",
      "粪坑3 page: 44 done...\n",
      "粪坑3 page: 45 done...\n",
      "粪坑3 page: 46 done...\n",
      "粪坑3 page: 47 done...\n",
      "粪坑3 page: 48 done...\n",
      "粪坑3 page: 49 done...\n",
      "粪坑3 page: 50 done...\n",
      "**************************粪坑2**************************\n",
      "粪坑2 page: 1 done...\n",
      "粪坑2 page: 2 done...\n",
      "粪坑2 page: 3 done...\n",
      "粪坑2 page: 4 done...\n",
      "粪坑2 page: 5 done...\n",
      "粪坑2 page: 6 done...\n",
      "粪坑2 page: 7 done...\n",
      "粪坑2 page: 8 done...\n",
      "粪坑2 page: 9 done...\n",
      "粪坑2 page: 10 done...\n",
      "粪坑2 page: 11 done...\n",
      "粪坑2 page: 12 done...\n",
      "粪坑2 page: 13 done...\n",
      "粪坑2 page: 14 done...\n",
      "粪坑2 page: 15 done...\n",
      "粪坑2 page: 16 done...\n",
      "粪坑2 page: 17 done...\n",
      "粪坑2 page: 18 done...\n",
      "粪坑2 page: 19 done...\n",
      "粪坑2 page: 20 done...\n",
      "粪坑2 page: 21 done...\n",
      "粪坑2 page: 22 done...\n",
      "粪坑2 page: 23 done...\n",
      "粪坑2 page: 24 done...\n",
      "粪坑2 page: 25 done...\n",
      "粪坑2 page: 26 done...\n",
      "粪坑2 page: 27 done...\n",
      "粪坑2 page: 28 done...\n",
      "粪坑2 page: 29 done...\n",
      "粪坑2 page: 30 done...\n",
      "粪坑2 page: 31 done...\n",
      "粪坑2 page: 32 done...\n",
      "粪坑2 page: 33 done...\n",
      "粪坑2 page: 34 done...\n",
      "粪坑2 page: 35 done...\n",
      "粪坑2 page: 36 done...\n",
      "粪坑2 page: 37 done...\n",
      "粪坑2 page: 38 done...\n",
      "粪坑2 page: 39 done...\n",
      "粪坑2 page: 40 done...\n",
      "粪坑2 page: 41 done...\n",
      "粪坑2 page: 42 done...\n",
      "粪坑2 page: 43 done...\n",
      "粪坑2 page: 44 done...\n",
      "粪坑2 page: 45 done...\n",
      "粪坑2 page: 46 done...\n",
      "粪坑2 page: 47 done...\n",
      "粪坑2 page: 48 done...\n",
      "粪坑2 page: 49 done...\n",
      "粪坑2 page: 50 done...\n",
      "**************************粪坑1**************************\n",
      "粪坑1 page: 1 done...\n",
      "粪坑1 page: 2 done...\n",
      "粪坑1 page: 3 done...\n",
      "粪坑1 page: 4 done...\n",
      "粪坑1 page: 5 done...\n",
      "粪坑1 page: 6 done...\n",
      "粪坑1 page: 7 done...\n",
      "粪坑1 page: 8 done...\n",
      "粪坑1 page: 9 done...\n",
      "粪坑1 page: 10 done...\n",
      "粪坑1 page: 11 done...\n",
      "粪坑1 page: 12 done...\n",
      "粪坑1 page: 13 done...\n",
      "粪坑1 page: 14 done...\n",
      "粪坑1 page: 15 done...\n",
      "粪坑1 page: 16 done...\n",
      "粪坑1 page: 17 done...\n",
      "粪坑1 page: 18 done...\n",
      "粪坑1 page: 19 done...\n",
      "粪坑1 page: 20 done...\n",
      "粪坑1 page: 21 done...\n",
      "粪坑1 page: 22 done...\n",
      "粪坑1 page: 23 done...\n",
      "粪坑1 page: 24 done...\n",
      "粪坑1 page: 25 done...\n",
      "粪坑1 page: 26 done...\n",
      "粪坑1 page: 27 done...\n",
      "粪坑1 page: 28 done...\n",
      "粪坑1 page: 29 done...\n",
      "粪坑1 page: 30 done...\n",
      "粪坑1 page: 31 done...\n",
      "粪坑1 page: 32 done...\n",
      "粪坑1 page: 33 done...\n",
      "粪坑1 page: 34 done...\n",
      "粪坑1 page: 35 done...\n",
      "粪坑1 page: 36 done...\n",
      "粪坑1 page: 37 done...\n",
      "粪坑1 page: 38 done...\n",
      "粪坑1 page: 39 done...\n",
      "粪坑1 page: 40 done...\n",
      "粪坑1 page: 41 done...\n",
      "粪坑1 page: 42 done...\n",
      "粪坑1 page: 43 done...\n",
      "粪坑1 page: 44 done...\n",
      "粪坑1 page: 45 done...\n",
      "粪坑1 page: 46 done...\n",
      "粪坑1 page: 47 done...\n",
      "粪坑1 page: 48 done...\n",
      "粪坑1 page: 49 done...\n",
      "粪坑1 page: 50 done...\n",
      "**************************粪坑0**************************\n",
      "粪坑0 page: 1 done...\n",
      "粪坑0 page: 2 done...\n",
      "粪坑0 page: 3 done...\n",
      "粪坑0 page: 4 done...\n",
      "粪坑0 page: 5 done...\n",
      "粪坑0 page: 6 done...\n",
      "粪坑0 page: 7 done...\n",
      "粪坑0 page: 8 done...\n",
      "粪坑0 page: 9 done...\n",
      "粪坑0 page: 10 done...\n",
      "粪坑0 page: 11 done...\n",
      "粪坑0 page: 12 done...\n",
      "粪坑0 page: 13 done...\n",
      "粪坑0 page: 14 done...\n",
      "粪坑0 page: 15 done...\n",
      "粪坑0 page: 16 done...\n",
      "粪坑0 page: 17 done...\n",
      "粪坑0 page: 18 done...\n",
      "粪坑0 page: 19 done...\n",
      "粪坑0 page: 20 done...\n",
      "粪坑0 page: 21 done...\n",
      "粪坑0 page: 22 done...\n",
      "粪坑0 page: 23 done...\n",
      "粪坑0 page: 24 done...\n",
      "粪坑0 page: 25 done...\n",
      "粪坑0 page: 26 done...\n",
      "粪坑0 page: 27 done...\n",
      "粪坑0 page: 28 done...\n",
      "粪坑0 page: 29 done...\n",
      "粪坑0 page: 30 done...\n",
      "粪坑0 page: 31 done...\n",
      "粪坑0 page: 32 done...\n",
      "粪坑0 page: 33 done...\n",
      "粪坑0 page: 34 done...\n",
      "粪坑0 page: 35 done...\n",
      "粪坑0 page: 36 done...\n",
      "粪坑0 page: 37 done...\n",
      "粪坑0 page: 38 done...\n",
      "粪坑0 page: 39 done...\n",
      "粪坑0 page: 40 done...\n",
      "粪坑0 page: 41 done...\n",
      "粪坑0 page: 42 done...\n",
      "粪坑0 page: 43 done...\n",
      "粪坑0 page: 44 done...\n",
      "粪坑0 page: 45 done...\n",
      "粪坑0 page: 46 done...\n",
      "粪坑0 page: 47 done...\n",
      "**************************素材1**************************\n",
      "素材1 page: 1 done...\n",
      "素材1 page: 2 done...\n",
      "素材1 page: 3 done...\n",
      "素材1 page: 4 done...\n",
      "素材1 page: 5 done...\n",
      "素材1 page: 6 done...\n",
      "素材1 page: 7 done...\n",
      "素材1 page: 8 done...\n",
      "素材1 page: 9 done...\n",
      "素材1 page: 10 done...\n",
      "素材1 page: 11 done...\n",
      "素材1 page: 12 done...\n",
      "素材1 page: 13 done...\n",
      "素材1 page: 14 done...\n",
      "素材1 page: 15 done...\n",
      "素材1 page: 16 done...\n",
      "素材1 page: 17 done...\n",
      "素材1 page: 18 done...\n",
      "素材1 page: 19 done...\n",
      "素材1 page: 20 done...\n",
      "素材1 page: 21 done...\n",
      "素材1 page: 22 done...\n",
      "素材1 page: 23 done...\n",
      "素材1 page: 24 done...\n",
      "素材1 page: 25 done...\n",
      "素材1 page: 26 done...\n",
      "素材1 page: 27 done...\n",
      "素材1 page: 28 done...\n",
      "素材1 page: 29 done...\n",
      "素材1 page: 30 done...\n",
      "素材1 page: 31 done...\n",
      "素材1 page: 32 done...\n",
      "素材1 page: 33 done...\n",
      "素材1 page: 34 done...\n",
      "素材1 page: 35 done...\n",
      "素材1 page: 36 done...\n",
      "素材1 page: 37 done...\n",
      "素材1 page: 38 done...\n",
      "素材1 page: 39 done...\n",
      "素材1 page: 40 done...\n",
      "素材1 page: 41 done...\n",
      "素材1 page: 42 done...\n",
      "素材1 page: 43 done...\n",
      "素材1 page: 44 done...\n",
      "素材1 page: 45 done...\n",
      "素材1 page: 46 done...\n",
      "素材1 page: 47 done...\n",
      "素材1 page: 48 done...\n",
      "素材1 page: 49 done...\n",
      "素材1 page: 50 done...\n",
      "**************************素材0**************************\n",
      "素材0 page: 1 done...\n",
      "素材0 page: 2 done...\n",
      "素材0 page: 3 done...\n",
      "素材0 page: 4 done...\n",
      "素材0 page: 5 done...\n",
      "素材0 page: 6 done...\n",
      "素材0 page: 7 done...\n",
      "素材0 page: 8 done...\n",
      "素材0 page: 9 done...\n",
      "素材0 page: 10 done...\n",
      "素材0 page: 11 done...\n",
      "素材0 page: 12 done...\n",
      "素材0 page: 13 done...\n",
      "素材0 page: 14 done...\n",
      "素材0 page: 15 done...\n",
      "素材0 page: 16 done...\n",
      "素材0 page: 17 done...\n",
      "素材0 page: 18 done...\n",
      "素材0 page: 19 done...\n",
      "素材0 page: 20 done...\n",
      "素材0 page: 21 done...\n",
      "素材0 page: 22 done...\n",
      "素材0 page: 23 done...\n",
      "素材0 page: 24 done...\n",
      "素材0 page: 25 done...\n",
      "素材0 page: 26 done...\n",
      "素材0 page: 27 done...\n",
      "素材0 page: 28 done...\n",
      "素材0 page: 29 done...\n",
      "素材0 page: 30 done...\n",
      "素材0 page: 31 done...\n",
      "素材0 page: 32 done...\n",
      "素材0 page: 33 done...\n",
      "素材0 page: 34 done...\n",
      "素材0 page: 35 done...\n",
      "素材0 page: 36 done...\n",
      "素材0 page: 37 done...\n",
      "素材0 page: 38 done...\n",
      "素材0 page: 39 done...\n",
      "素材0 page: 40 done...\n",
      "素材0 page: 41 done...\n",
      "素材0 page: 42 done...\n",
      "素材0 page: 43 done...\n",
      "素材0 page: 44 done...\n",
      "素材0 page: 45 done...\n",
      "素材0 page: 46 done...\n",
      "素材0 page: 47 done...\n",
      "素材0 page: 48 done...\n",
      "素材0 page: 49 done...\n",
      "**************************声剧**************************\n",
      "声剧 page: 1 done...\n",
      "声剧 page: 2 done...\n",
      "声剧 page: 3 done...\n",
      "声剧 page: 4 done...\n"
     ]
    }
   ],
   "source": [
    "# 每个收藏夹想要的数据：标题，封面，简介，搬运工的昵称等，想了下还是想本地化再清洗好了，这样不仅方便，而且信息可以全都保留下来\n",
    "# 感觉如果直接用json会爆内存，得用txt了，那就得简单格式化下数据了\n",
    "try:\n",
    "    for media in my_likes:\n",
    "        media_name = list(media.keys())[0]\n",
    "        # 上面搞到的是一个list包含了若干了dict，这里先得把dict搞出来，然后获取它的键，再利用键取到收藏夹id（也可以直接用items方法）\n",
    "        media_id = media[media_name][0]\n",
    "        # res拿到的是收藏夹pages这一页的数据，如果这一页没有数据，midia这个属性就为null\n",
    "        pages = 1\n",
    "        print('**************************' + media_name + '**************************')\n",
    "        file.writelines('**************************' + media_name + '**************************\\n')\n",
    "        # 这层循环爬的一个收藏夹的所有视频，上一个循环爬所有收藏夹\n",
    "        while True:\n",
    "            res = crawler.get(media_url.format(media_id = media_id, page = pages)).json()\n",
    "            if not res['data']['medias']: break\n",
    "            # 小问题，json键得用\"，而不是'，这里画蛇添足了，改成双引号可能有的视频简介里有双引号，就会产生错误，所以直接用单引号就行\n",
    "            meida_list = res['data']['medias']\n",
    "            # 拿到的一页数据是以列表形式来的，故此这里得把这些元素拿出来，然后转成Json能解析的双引号再写到文件里\n",
    "            for ml in meida_list:\n",
    "                # file.writelines(str(ml).replace(\"'\", \"\\\"\") + '\\n')\n",
    "                file.writelines(str(ml) + '\\n')\n",
    "            print(media_name + \" page: \" + str(pages) + ' done...')\n",
    "            # print(\"link: \" + media_url.format(media_id = media_id, page = pages))\n",
    "            pages += 1\n",
    "            file.flush()\n",
    "            # 1.25秒一页，一个满的收藏夹大概要1分钟\n",
    "            sleep(1.25)\n",
    "finally:\n",
    "    file.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 345,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "file = open('./data.txt', 'r', encoding = 'utf-8')\n",
    "data_dict = {}\n",
    "def add_data(d, data):\n",
    "    data['title'].append(d['title'])\n",
    "    data['up'].append(d['upper']['name'])\n",
    "    data['info'].append(d['intro'])\n",
    "    data['aid'].append(d['id'])\n",
    "    data['bid'].append(d['bvid'])\n",
    "while True:\n",
    "    line = file.readline()\n",
    "    if not line: break\n",
    "    if ('********' in line): \n",
    "        dict_key = line.replace('*', '').replace('\\n', '')\n",
    "        data_dict[dict_key] = {\n",
    "            'title' : [],\n",
    "            'up' : [],\n",
    "            'info' : [],\n",
    "            'aid' : [],\n",
    "            'bid' : []\n",
    "        }\n",
    "        continue\n",
    "    add_data(eval(line), data_dict[dict_key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 364,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 注意DataFrame只能接受类似{key:val}形式的字典，而不能用字典嵌套...\n",
    "for k in data_dict.keys():\n",
    "    df = pd.DataFrame(data_dict[k])\n",
    "    df.to_csv('./' + k + '.csv')\n",
    "# df = pd.DataFrame(data_dict['粪坑10'])\n",
    "# df['up'].value_counts()\n",
    "# tst = {\"title\":[1, 2 ,3, 4]}\n",
    "# # tst['title'].append({\"1\":114})\n",
    "# tdf = pd.DataFrame(tst)\n",
    "# tdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 392,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "声剧 0\n",
      "粪坑0 415\n",
      "粪坑1 354\n",
      "粪坑2 195\n",
      "粪坑3 179\n",
      "粪坑4 137\n",
      "粪坑5 132\n",
      "粪坑6 110\n",
      "粪坑7 65\n",
      "粪坑8 74\n",
      "粪坑9 32\n",
      "粪坑10 26\n",
      "素材0 366\n",
      "素材1 93\n",
      "素材2 19\n",
      "收藏了13162个视频\n",
      "被橄榄2197个视频\n",
      "削除比例16.69%\n",
      "那明天呢？__倒闭的日子！\n"
     ]
    }
   ],
   "source": [
    "likes = ['声剧']\n",
    "for i in range(11):\n",
    "    likes.append('粪坑' + str(i))\n",
    "for i in range(3):\n",
    "    likes.append('素材' + str(i))\n",
    "tot = video_cnt = 0\n",
    "for lk in range(len(likes)):\n",
    "    df = pd.read_csv('./'+ likes[lk] +'.csv')\n",
    "    video_cnt += len(df['title'])\n",
    "    try:\n",
    "        temp = int(df['title'].value_counts()['已失效视频'])\n",
    "    except KeyError:\n",
    "        temp = 0\n",
    "    tot += temp\n",
    "    print(likes[lk] + ' %d' % (temp, ))\n",
    "print(\"收藏了%d个视频\" % (video_cnt, ))\n",
    "print(\"被橄榄%d个视频\" % (tot, ))\n",
    "print(\"削除比例%.2f%%\" % ((tot / video_cnt) * 100, ))\n",
    "print(\"那明天呢？__倒闭的日子！\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'hello world'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'hello world'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
